This notebook demonstrates the Australian Water Observations from Space (WOFS) algorithm but uses Sentinel-2 instead of Landsat. This water detection algorithm is an improvement over the Landsat QA water flag or the NDWI index for water identification. For more information, visit this website:
http://www.ga.gov.au/scientific-topics/hazards/flood/wofs
This notebook uses the GA WOFS algorithm directly from https://github.com/GeoscienceAustralia/wofs
This should only need to be run once. Once it is finished, make sure that you refresh your browser and select the new 'wofs' kernel from the kernel selector at the top right.
# !sh ../bin/install_wofs.sh
import datacube
dc = datacube.Datacube(app='Water_Observations_from_Space')
from datacube.utils import masking
import sys, os
os.environ['USE_PYGEOS'] = '0'
from pathlib import Path
import datetime
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import rioxarray
import pandas as pd
from dea_tools.plotting import rgb, display_map
from dea_tools.bandindices import calculate_indices
### EASI tools
sys.path.append(os.path.expanduser('../scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiDefaults
from easi_tools import notebook_utils
easi = EasiDefaults() # Get the default parameters for this system
Successfully found configuration for deployment "eail"
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))
786e1927
| Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
| Total threads: 8 | Total memory: 29.00 GiB |
| Status: running | Using processes: True |
Scheduler-5bde8076-8383-48f2-b0eb-e104f0eb65ff
| Comm: tcp://127.0.0.1:43931 | Workers: 4 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 8 |
| Started: Just now | Total memory: 29.00 GiB |
| Comm: tcp://127.0.0.1:40673 | Total threads: 2 |
| Dashboard: http://127.0.0.1:39293/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:45127 | |
| Local directory: /tmp/dask-worker-space/worker-rrsn9g1z | |
| Comm: tcp://127.0.0.1:37173 | Total threads: 2 |
| Dashboard: http://127.0.0.1:34131/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:41137 | |
| Local directory: /tmp/dask-worker-space/worker-vdhnx6d0 | |
| Comm: tcp://127.0.0.1:42219 | Total threads: 2 |
| Dashboard: http://127.0.0.1:36831/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:44617 | |
| Local directory: /tmp/dask-worker-space/worker-2cn3nj_b | |
| Comm: tcp://127.0.0.1:39843 | Total threads: 2 |
| Dashboard: http://127.0.0.1:45385/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:37071 | |
| Local directory: /tmp/dask-worker-space/worker-6l6gcov8 | |
https://hub.eail.easi-eo.solutions/user/jhodge/proxy/8787/status
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)
<botocore.credentials.Credentials at 0x7f0d42594370>
# Define the Product
product = "s2_l2a"
# Select an analysis region (Latitude-Longitude)
# Select a time period within the extents of the dataset (Year-Month-Day)
# Mombasa, Kenya
# latitude = (-4.05, -3.95)
# longitude = (39.60, 39.68)
# latitude=easi.latitude
# longitude=easi.longitude
# latitude = (36.3, 36.5)
# longitude = (-114.3, -114.5)
# For this, we will deliberately use UTM projected coordinates as it
# appears that there might be a big in the wofs code when the area
# of interest has different sizes in the x and y dimensions
from pyproj import Proj, CRS, Transformer
crs = CRS.from_epsg(32611)
to_utm = Transformer.from_crs(crs.geodetic_crs, crs)
to_latlong = Transformer.from_crs(crs, crs.geodetic_crs)
utm = to_utm.transform(36.38,-114.4)
buffer = 12000 # set the buffer size in m
# Convert back to latitudes and longitudes to visualise the area
topleft = to_latlong.transform(utm[0]+buffer,utm[1]-buffer)
bottomright = to_latlong.transform(utm[0]-buffer,utm[1]+buffer)
latitude = (topleft[0],bottomright[0])
longitude = (topleft[1],bottomright[1])
# Define Time Range
# Landsat-8 time range: 07-Apr-2013 to current
time_extents = ('2021-01-01', '2021-12-31')
# The code below renders a map that can be used to view the analysis region.
display_map(longitude,latitude)
/env/lib/python3.10/site-packages/dea_tools/plotting.py:313: FutureWarning: This function is deprecated. See: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1
all_longitude, all_latitude = transform(Proj(crs), Proj('EPSG:4326'), all_x,
measurements = ['red', 'green', 'blue', 'nir', 'swir_1', 'swir_2', 'SCL']
data_names = measurements.copy()
data_names.remove('SCL')
s2_dataset = dc.load(y = (utm[1]-buffer,utm[1]+buffer),
x = (utm[0]-buffer,utm[0]+buffer),
time = time_extents,
product = product,
crs = 'EPSG:32611',
output_crs = 'EPSG:32611',
resolution = (-10,10),
measurements = measurements,
dask_chunks = {'time':1},
group_by = 'solar_day')
s2_dataset
<xarray.Dataset>
Dimensions: (time: 73, y: 2401, x: 2401)
Coordinates:
* time (time) datetime64[ns] 2021-01-05T18:34:07 ... 2021-12-31T18:...
* y (y) float64 4.041e+06 4.041e+06 ... 4.017e+06 4.017e+06
* x (x) float64 7.212e+05 7.212e+05 ... 7.452e+05 7.452e+05
spatial_ref int32 32611
Data variables:
red (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
green (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
blue (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
nir (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
swir_1 (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
swir_2 (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
SCL (time, y, x) uint8 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
Attributes:
crs: EPSG:32611
grid_mapping: spatial_ref# Where to save the DEM fetched in ODC
DEM_PATH = "dem_for_wofs.tif"
# Load the elevation data
from os import environ
from cartopy.crs import PlateCarree
from datacube import Datacube
from datashader import reductions
import holoviews as hv
import hvplot.xarray
import matplotlib.pyplot as plt
dem = dc.load(
product="copernicus_dem_30",
y = (utm[1]-buffer,utm[1]+buffer),
x = (utm[0]-buffer,utm[0]+buffer),
crs = "epsg:32611",
output_crs="epsg:32611",
resolution=(-10, 10) # This is above the resolution of the DEM, but setting it to match Sentinel 2
)
elevation = dem.elevation.squeeze()
# Have a look at the DEM data
options = {
'title': 'Elevation',
'frame_width': 400,
'frame_height': 400,
'aspect': 'equal',
'cmap': plt.cm.terrain,
'clim': (elevation.min().values.item(), elevation.max().values.item()), # Limit the color range
'colorbar': True,
'tools': ['hover'],
}
plot_crs = 'epsg:32611'
elevation.hvplot.image(
x = 'x', y = 'y', # Dataset x,y dimension names
crs = plot_crs,
rasterize = True, # If False, data will not be reduced. This is slow to load but all data is loaded.
aggregator = reductions.mean(), # Datashader calculates the mean value for reductions (also first, min, max, las, std, mode)
precompute = True, # Datashader precomputes what it can
).opts(**options).hist(bin_range = options['clim'])
dem_path = Path(DEM_PATH)
dem_path.parent.mkdir(parents=True, exist_ok=True)
elevation.rio.to_raster(dem_path)
from wofs.virtualproduct import WOfSClassifier
# Rename some variables so that the GA algorithm works
s2_dataset = s2_dataset.rename_vars({
"blue": "nbart_blue",
"green": "nbart_green",
"red": "nbart_red",
"nir": "nbart_nir",
"swir_1": "nbart_swir_1",
"swir_2": "nbart_swir_2",
"SCL": "fmask",
})
s2_dataset
<xarray.Dataset>
Dimensions: (time: 73, y: 2401, x: 2401)
Coordinates:
* time (time) datetime64[ns] 2021-01-05T18:34:07 ... 2021-12-31T18...
* y (y) float64 4.041e+06 4.041e+06 ... 4.017e+06 4.017e+06
* x (x) float64 7.212e+05 7.212e+05 ... 7.452e+05 7.452e+05
spatial_ref int32 32611
Data variables:
nbart_red (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
nbart_green (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
nbart_blue (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
nbart_nir (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
nbart_swir_1 (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
nbart_swir_2 (time, y, x) uint16 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
fmask (time, y, x) uint8 dask.array<chunksize=(1, 2401, 2401), meta=np.ndarray>
Attributes:
crs: EPSG:32611
grid_mapping: spatial_ref# Prepare the classifier
ts_water_classification = WOfSClassifier(c2_scaling=False,dsm_path=DEM_PATH,dsm_no_data=-32767)
# Run the classification. There might be some warnings about invalid values.
wofl = ts_water_classification.compute(s2_dataset)
/home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/terrain.py:121: RuntimeWarning: invalid value encountered in arccos sia = 90 - numpy.degrees(numpy.arccos(sia)) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/terrain.py:121: RuntimeWarning: invalid value encountered in arccos sia = 90 - numpy.degrees(numpy.arccos(sia)) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/terrain.py:121: RuntimeWarning: invalid value encountered in arccos sia = 90 - numpy.degrees(numpy.arccos(sia))
# Rename dimensions as required
wofl = wofl.rename({"x": "longitude", "y": "latitude"})
# Now categorise the data based on the classifier output
from odc.algo import safe_div, apply_numexpr, keep_good_only
wofl["bad"] = (wofl.water & 0b0111_1110) > 0
wofl["some"] = apply_numexpr("((water<<30)>>30)==0", wofl, name="some")
wofl["dry"] = wofl.water == 0
wofl["wet"] = wofl.water == 128
wofl = wofl.drop_vars("water")
for dv in wofl.data_vars.values():
dv.attrs.pop("nodata", None)
# Run all the calculations and load into memory
wofl = wofl.compute()
/home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /home/jovyan/venvs/wofs/lib/python3.10/site-packages/wofs/classifier.py:81: RuntimeWarning: invalid value encountered in divide c = (a - b) / (a + b) /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject(
The images below are intended to be used to investigate issues of speckling in water bodies. See the Landsat notebook for a comparison.
# Have a look at the data
import matplotlib as mpl
from matplotlib.colors import LinearSegmentedColormap
water_cmap = LinearSegmentedColormap.from_list('water_class', ['#32373B', '#1789FC'], N=2)
###########################################
# This code is commented out to save time #
###########################################
# fig = wofl.wet.plot(col="time", col_wrap=7, size=3, aspect=1,cmap=water_cmap)
# for ax in fig.axs.flat:
# ax.xaxis.set_visible(False) # remove tile axes
# ax.yaxis.set_visible(False) # remove tile axes
# ax.set_title(ax.get_title().replace('time = ',''), fontsize=10) # clean up tile titles
# fig.cbar.ax.set_yticks(ticks=[0.25,0.75])
# fig.cbar.ax.set_yticklabels(['0 - Not water', '1 - Water'],rotation='vertical',verticalalignment='center')
# fig.cbar.ax.set_ylabel(None)
# fig
# Compare a couple of specific scenes to investigate further
date_1 = '2021-05-30'
date_2 = '2021-12-11'
fig = plt.figure(figsize=(14, 14))
ax1 = fig.add_subplot(2,2,1, aspect = "equal")
ax2 = fig.add_subplot(2,2,2, aspect = "equal")
ax3 = fig.add_subplot(2,2,3, aspect = "equal")
ax4 = fig.add_subplot(2,2,4, aspect = "equal")
true_1 = s2_dataset[['nbart_red','nbart_green','nbart_blue']].sel(time=date_1,method='nearest').to_array().plot.imshow(ax=ax1,robust=True)
wet_1 = wofl.wet.sel(time=date_1,method='nearest').plot(ax=ax2,add_colorbar=False,cmap=water_cmap)
true_2 = s2_dataset[['nbart_red','nbart_green','nbart_blue']].sel(time=date_2,method='nearest').to_array().plot.imshow(ax=ax3,robust=True)
wet_2 = wofl.wet.sel(time=date_2,method='nearest').plot(ax=ax4,add_colorbar=False,cmap=water_cmap)
ax1.set_title(f'True Color - good classification ({date_1})'), ax1.xaxis.set_visible(False), ax1.yaxis.set_visible(False)
ax2.set_title(f'Water classification - good classification ({date_1})'), ax2.xaxis.set_visible(False), ax2.yaxis.set_visible(False)
ax3.set_title(f'True Color - poor classification ({date_2})'), ax3.xaxis.set_visible(False), ax3.yaxis.set_visible(False)
ax4.set_title(f'Water classification - poor classification ({date_2})'), ax4.xaxis.set_visible(False), ax4.yaxis.set_visible(False)
# Forcing the colorbar to be added separately so that it doesn't change the figure sizes
ax_cbar1 = fig.add_axes([1, 0.4875, 0.02, 0.4625])
ax_cbar2 = fig.add_axes([1, 0, 0.02, 0.4625])
cbar1 = fig.colorbar(wet_1,cax=ax_cbar1,ticks=[0.25,0.75])
cbar1.ax.set_yticklabels(['0 - Not water', '1 - Water'],rotation='vertical',verticalalignment='center')
cbar2 = fig.colorbar(wet_2,cax=ax_cbar2,ticks=[0.25,0.75])
cbar2.ax.set_yticklabels(['0 - Not water', '1 - Water'],rotation='vertical',verticalalignment='center')
plt.subplots_adjust(left=0,bottom=0,right=0.95,top=0.95,wspace=0.05,hspace=0.05) # tight_layout() doesn't work when using add_axes()
plt.show()
Compared to Landsat, the issue of speckling in the lake appears to be less pronounced, but this example still shows some issues.
# Helper frunction from https://github.com/opendatacube/odc-stats/blob/develop/odc/stats/plugins/wofs.py
def reduce(xx: xr.Dataset) -> xr.Dataset:
nodata = -999
count_some = xx.some.sum(axis=0, dtype="int16")
count_wet = xx.wet.sum(axis=0, dtype="int16")
count_dry = xx.dry.sum(axis=0, dtype="int16")
count_clear = count_wet + count_dry
frequency = safe_div(count_wet, count_clear, dtype="float32")
count_wet.attrs["nodata"] = nodata
count_clear.attrs["nodata"] = nodata
is_ok = count_some > 0
count_wet = keep_good_only(count_wet, is_ok)
count_clear = keep_good_only(count_clear, is_ok)
return xr.Dataset(
dict(
count_wet=count_wet,
count_clear=count_clear,
frequency=frequency,
)
)
summary = reduce(wofl)
summary
<xarray.Dataset>
Dimensions: (latitude: 2401, longitude: 2401)
Coordinates:
* latitude (latitude) float64 4.041e+06 4.041e+06 ... 4.017e+06 4.017e+06
* longitude (longitude) float64 7.212e+05 7.212e+05 ... 7.452e+05 7.452e+05
spatial_ref int32 32611
Data variables:
count_wet (latitude, longitude) int16 1 1 1 1 1 1 1 1 ... 0 0 0 0 0 0 0 1
count_clear (latitude, longitude) int16 73 73 73 73 73 73 ... 0 0 0 0 73 73
frequency (latitude, longitude) float32 0.0137 0.0137 ... 0.0 0.0137from matplotlib.cm import jet_r
jet_r.set_bad('black',1)
# Plot of wet counts
# Areas of RED have experienced little or no water over the time series
# Areas of BLUE have experience significant or constant water over the time series
summary.count_wet.plot(size=10,cmap = jet_r);
plt.title("Count of Samples Classified as Water")
plt.axis('off')
plt.show()
# Plot of clear counts
summary.count_clear.plot(size=10,cmap = jet_r);
plt.title("Count of Samples Classified as Water")
plt.axis('off')
plt.show()
# Plot of wet frequency
(summary.frequency*100).plot(cmap = jet_r, size=10)
plt.title("Percent of Samples Classified as Water")
plt.axis('off')
plt.show()